#!/bin/bash

declare -A tab_secondfile
tab_secondfile[0]=intersect_UT.bed

Dirname=##Put the path to your working directory here

InputFile_Up=##Put the name of the file containing your up-regulated data here
InputFile_Down=##Put the name of the file containing your down-regulated data here

whole_symbolegene=whole_symbolegene.bed
Last_exon_symbolegene=last_exon_symbolegene.bed

for Bedtools_fb in "${tab_secondfile[@]}"
do
	echo "@@"
	echo ${Bedtools_fb}

	Output_NR=${Dirname}/Whole_LE/whole_LE_${InputFile_NR%.*}_IN_${Bedtools_fb%.*}.bed
	Output_Up=${Dirname}/Whole_LE/whole_LE_${InputFile_Up%.*}_IN_${Bedtools_fb%.*}.bed
	Output_Down=${Dirname}/Whole_LE/whole_LE_${InputFile_Down%.*}_IN_${Bedtools_fb%.*}.bed

	if [ ! -d ${Dirname}/Whole_LE ]; then
  		mkdir -p ${Dirname}/Whole_LE;
	fi
	echo "*********** For UP ************"
	InputFile=${InputFile_Up}
	echo ${InputFile}

	grep -v "baseMean" ${Dirname}/${InputFile_Up} |perl -nale '{print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]\t$F[5]\t$F[6]\t$F[18]\t$F[19]\t$F[20]\t$F[21]\t$F[22]";}'|awk  '{gsub(/\|/,"\t",$1);print$1}'|awk '{print $1,"\t",$2"|"$3}'|sed "s/:/\t/g"|awk  '{gsub(/\-/,"\t",$2);print$1,"\t",$2,"\t",$3,"\t",$4,"\t",$4}'|awk  '{gsub(/\_/,"\t",$6);print$1,"\t",$2,"\t",$3,"\t",$4,"\t",$5,"\t",$6}'|cut -f 1,2,3,4,5,11|sed "s/|//g"|sed "s/ //g"|awk  '{gsub(/\mpeak/,"",$6);print$1,"\t",$2,"\t",$3,"\t",$4,"\t",$5,"\t",$6}'|sort -k 1,1 -k2,2n > ${Dirname}/tmp0

	more ${Dirname}/tmp0|awk  '{gsub(/mpeak/,"\t",$5);print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $6!="") gsub(/_/,"\t",$6);print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6;}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $8!="") print $1"\t"$2"\t"$3"\t"$4"\t"$5"\n"$1"\t"$2"\t"$3"\t"$4"\t"$1"_"$8"_"$9"_"$10"_"$11"_"$12;}' > ${Dirname}/tmp1

	more ${Dirname}/tmp0|awk  '{gsub(/mpeak/,"\t",$5);print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $6!="") gsub(/_/,"\t",$6);print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6;}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $8=="") print $1"\t"$2"\t"$3"\t"$4"\t"$5;}' > ${Dirname}/tmp2


	cat ${Dirname}/tmp1 ${Dirname}/tmp2|sort|uniq|awk  '{print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$5}'|awk  '{gsub(/\_/,"\t",$6);print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}'|awk  '{print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$1"|"$9"_"$10"\t"$11}' > ${Dirname}/${InputFile}_tmp2.bed

	rm ${Dirname}/tmp0
	rm ${Dirname}/tmp1
	rm ${Dirname}/tmp2
	cut -f 4 ${Dirname}/${InputFile}_tmp2.bed|sort|uniq|wc -l


	######################################
	###### Join peaks and the LE
	######################################

	join -1 6 -2 4 -o 1.1,1.2,1.3,1.4,1.5,1.6,1.7,2.1,2.2,2.3,2.4,2.5 <(sort -k 6,6 ${Dirname}/${InputFile}_tmp2.bed) <(sort -k 4,4 ${Dirname}/${Last_exon_symbolegene})|sed "s/ /\t/g"|sed "s/|/\t/g" > ${Dirname}/${InputFile}_tmp3.bed

	awk '{print $9"\t"$10"\t"$11"\t"$4"\t"$5"\t"$14}' ${Dirname}/${InputFile}_tmp3.bed |sort|uniq |awk '{gsub(/\_/,"\t",$5);print$1"\t"$2"\t"$3"\t"$4"\t"$5}'|awk  '{print$1"\t"$2"\t"$3"\t"$4"\t"$5"|"$8"_"$9"\t"$10}' > ${Dirname}/${InputFile}_tmp4.bed

	######################################
	###### Join peaks and the coordinate of gene
	######################################

	join -1 5 -2 4 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.1,2.2,2.3,2.4,2.5 <(sort -k 5,5 ${Dirname}/${InputFile}_tmp4.bed) <(sort -k 4,4 ${Dirname}/${whole_symbolegene})|sed "s/ /\t/g"|sed "s/|/\t/g"|sort|uniq|awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$8":"$9"_"$10"_"$12"_"$13"\t"$13}' > ${Dirname}/${InputFile}_tmp5.bed

	cut -f 4 ${Dirname}/${InputFile}_tmp4.bed|sort|uniq|wc -l

	###################################################
	######## Bedtools_Intersect              ##########
	###################################################


	bedtools intersect -a ${Dirname}/${InputFile}_tmp5.bed -b ${Dirname}/${Bedtools_fb} -s -wa -wb > ${Output_Up}

	echo -e "Done!!"
	echo "Nb peak in :"
	echo ${Output_Up}
	PI=$(cut -f 4 ${Output_Up}|sort|uniq|wc -l)
	nb_peaks=$((PI))
	echo $nb_peaks



	echo "**************  For DOWN ************"

	InputFile=${InputFile_Down}

	grep -v "baseMean" ${Dirname}/${InputFile_Down} |perl -nale '{print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]\t$F[5]\t$F[6]\t$F[18]\t$F[19]\t$F[20]\t$F[21]\t$F[22]";}'|awk  '{gsub(/\|/,"\t",$1);print$1}'|awk '{print $1,"\t",$2"|"$3}'|sed "s/:/\t/g"|awk  '{gsub(/\-/,"\t",$2);print$1,"\t",$2,"\t",$3,"\t",$4,"\t",$4}'|awk  '{gsub(/\_/,"\t",$6);print$1,"\t",$2,"\t",$3,"\t",$4,"\t",$5,"\t",$6}'|cut -f 1,2,3,4,5,11|sed "s/|//g"|sed "s/ //g"|awk  '{gsub(/\mpeak/,"",$6);print$1,"\t",$2,"\t",$3,"\t",$4,"\t",$5,"\t",$6}'|sort -k 1,1 -k2,2n > ${Dirname}/tmp0

	more ${Dirname}/tmp0 |awk  '{gsub(/mpeak/,"\t",$5);print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $6!="") gsub(/_/,"\t",$6);print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6;}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $8!="") print $1"\t"$2"\t"$3"\t"$4"\t"$5"\n"$1"\t"$2"\t"$3"\t"$4"\t"$1"_"$8"_"$9"_"$10"_"$11"_"$12;}' > ${Dirname}/tmp1

	more ${Dirname}/tmp0 |awk  '{gsub(/mpeak/,"\t",$5);print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $6!="") gsub(/_/,"\t",$6);print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6;}'|awk 'BEGIN{FS="\t";}{if(NF>1 && $8=="") print $1"\t"$2"\t"$3"\t"$4"\t"$5;}' > ${Dirname}/tmp2


	cat ${Dirname}/tmp1 ${Dirname}/tmp2|sort|uniq|awk  '{print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$5}'|awk  '{gsub(/\_/,"\t",$6);print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}'|awk  '{print$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$1"|"$9"_"$10"\t"$11}' > ${Dirname}/${InputFile}_tmp2.bed

	rm ${Dirname}/tmp0
	rm ${Dirname}/tmp1
	rm ${Dirname}/tmp2
	cut -f 4 ${Dirname}/${InputFile}_tmp2.bed|sort|uniq|wc -l


	######################################
	###### Join peaks and the LE
	######################################

	join -1 6 -2 4 -o 1.1,1.2,1.3,1.4,1.5,1.6,1.7,2.1,2.2,2.3,2.4,2.5 <(sort -k 6,6 ${Dirname}/${InputFile}_tmp2.bed) <(sort -k 4,4 ${Dirname}/${Last_exon_symbolegene})|sed "s/ /\t/g"|sed "s/|/\t/g" > ${Dirname}/${InputFile}_tmp3.bed

	awk '{print $9"\t"$10"\t"$11"\t"$4"\t"$5"\t"$14}' ${Dirname}/${InputFile}_tmp3.bed |sort|uniq |awk '{gsub(/\_/,"\t",$5);print$1"\t"$2"\t"$3"\t"$4"\t"$5}'|awk  '{print$1"\t"$2"\t"$3"\t"$4"\t"$5"|"$8"_"$9"\t"$10}' > ${Dirname}/${InputFile}_tmp4.bed

	######################################
	###### Join peaks and the coordinate of gene
	######################################

	join -1 5 -2 4 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.1,2.2,2.3,2.4,2.5 <(sort -k 5,5 ${Dirname}/${InputFile}_tmp4.bed) <(sort -k 4,4 ${Dirname}/${whole_symbolegene})|sed "s/ /\t/g"|sed "s/|/\t/g"|sort|uniq|awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$8":"$9"_"$10"_"$12"_"$13"\t"$13}' > ${Dirname}/${InputFile}_tmp5.bed

	cut -f 4 ${Dirname}/${InputFile}_tmp4.bed|sort|uniq|wc -l

	###################################################
	######## Bedtools_Intersect              ##########
	###################################################

	bedtools intersect -a ${Dirname}/${InputFile}_tmp5.bed -b ${Dirname}/${Bedtools_fb} -s -wa -wb > ${Output_Down}

	rm ${Dirname}/${InputFile}_tmp2.bed
	rm ${Dirname}/${InputFile}_tmp3.bed
	rm ${Dirname}/${InputFile}_tmp4.bed
	rm ${Dirname}/${InputFile}_tmp5.bed

	echo -e "Done!!"
	echo "Nb peak in :"
	echo ${Output_Down}

	PI=$(cut -f 4 ${Output_Down}|sort|uniq|wc -l)
	nb_peaks=$((PI))
	echo $nb_peaks

done
